#loading the necessary packages
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.7     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.2.1Warning: package ‘dplyr’ was built under R version 4.2.1── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(janitor)
Warning: package ‘janitor’ was built under R version 4.2.1
Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(readxl)
Warning: package ‘readxl’ was built under R version 4.2.1
library(here)
Warning: package ‘here’ was built under R version 4.2.1here() starts at C:/Users/nico-/OneDrive/Desktop/Codeclan/dirty_data_project/dirty_data_project/task_4/dirty_data_task_4
#loading the data
candy_2015 <- read_excel(here("../raw_data/boing-boing-candy-2015.xlsx")) %>% 
  clean_names()
candy_2016 <- read_excel(here("../raw_data/boing-boing-candy-2016.xlsx")) %>% 
  clean_names()
candy_2017 <- read_excel(here("../raw_data/boing-boing-candy-2017.xlsx")) %>% 
  clean_names()
New names:
candy_2016
candy_2015
candy_2017

#order of operations: #decide which variables to keep for the analysis according to business questions # Pivot data the same for all 3 datasets # perform pivot on all three # Investigate other columns

#keeping variables useful for analysis from 2015 dataset #The variables are a bit confusing. I will only keep only variables that are actual food and drop anything which is not food.

candy_2015 <- candy_2015 %>% 
  select(-c(
    cash_or_other_forms_of_legal_tender, 
    creepy_religious_comics_chick_tracts, 
    hugs_actual_physical_hugs, 
    please_leave_any_remarks_or_comments_regarding_your_choices: 
    please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_beyonce_knowles, 
    dental_paraphenalia, 
    generic_brand_acetaminophen, 
    peterson_brand_sidewalk_chalk
))
candy_2015

#time to rename some columns. Maybe not that necessary but I would like to have my final results with neat names.

candy_2015 <- candy_2015 %>% 
  rename("trick_or_treat" = are_you_going_actually_going_trick_or_treating_yourself,
         "grand_bar" = x100_grand_bar, 
         "brown_globs" = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, 
         "any_candy_bar" = any_full_sized_candy_bar, 
         "brach_without_candy_corn" = brach_products_not_including_candy_corn, 
         "high_fructose_corn_syrup" = vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein, 
         "restaurant_candy" = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, 
         "chick_o_sticks"  = chick_o_sticks_we_don_t_know_what_that_is, 
         "old_marshmallow" = those_odd_marshmallow_circus_peanut_things)
candy_2015

#same thing for the 2016 dataset

#selecting only the necessary columns for final analysis
candy_2016 <- candy_2016 %>% 
  select(-c(cash_or_other_forms_of_legal_tender, 
            creepy_religious_comics_chick_tracts, 
            dental_paraphenalia, 
            generic_brand_acetaminophen, 
            hugs_actual_physical_hugs, 
            person_of_interest_season_3_dvd_box_set_not_including_disc_4_with_hilarious_outtakes, 
            sourpatch_kids_i_e_abominations_of_nature, 
            vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein, 
            please_list_any_items_not_included_above_that_give_you_joy:
              york_peppermint_patties_ignore))
candy_2016
candy_2016 <- candy_2016 %>% 
  rename("trick_or_treat" = are_you_going_actually_going_trick_or_treating_yourself, 
         "gender" = your_gender, 
         "age" = how_old_are_you, 
         "country" = which_country_do_you_live_in, 
         "state_province" = which_state_province_county_do_you_live_in, 
         "grand_bar" = x100_grand_bar, 
         "brown_globs" = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, 
         )
candy_2016

#sorting the country column

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCiNsb2FkaW5nIHRoZSBuZWNlc3NhcnkgcGFja2FnZXMNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShyZWFkeGwpDQpsaWJyYXJ5KGhlcmUpDQpgYGANCmBgYHtyfQ0KI2xvYWRpbmcgdGhlIGRhdGENCmNhbmR5XzIwMTUgPC0gcmVhZF9leGNlbChoZXJlKCIuLi9yYXdfZGF0YS9ib2luZy1ib2luZy1jYW5keS0yMDE1Lnhsc3giKSkgJT4lIA0KICBjbGVhbl9uYW1lcygpDQpjYW5keV8yMDE2IDwtIHJlYWRfZXhjZWwoaGVyZSgiLi4vcmF3X2RhdGEvYm9pbmctYm9pbmctY2FuZHktMjAxNi54bHN4IikpICU+JSANCiAgY2xlYW5fbmFtZXMoKQ0KY2FuZHlfMjAxNyA8LSByZWFkX2V4Y2VsKGhlcmUoIi4uL3Jhd19kYXRhL2JvaW5nLWJvaW5nLWNhbmR5LTIwMTcueGxzeCIpKSAlPiUgDQogIGNsZWFuX25hbWVzKCkNCmBgYA0KDQpgYGB7cn0NCmNhbmR5XzIwMTYNCmNhbmR5XzIwMTUNCmNhbmR5XzIwMTcNCmBgYA0KDQoNCiNvcmRlciBvZiBvcGVyYXRpb25zOiANCiNkZWNpZGUgd2hpY2ggdmFyaWFibGVzIHRvIGtlZXAgZm9yIHRoZSBhbmFseXNpcyBhY2NvcmRpbmcgdG8gYnVzaW5lc3MgcXVlc3Rpb25zDQojIFBpdm90IGRhdGEgdGhlIHNhbWUgZm9yIGFsbCAzIGRhdGFzZXRzDQojIHBlcmZvcm0gcGl2b3Qgb24gYWxsIHRocmVlDQojIEludmVzdGlnYXRlIG90aGVyIGNvbHVtbnMNCg0KDQoja2VlcGluZyB2YXJpYWJsZXMgdXNlZnVsIGZvciBhbmFseXNpcyBmcm9tIDIwMTUgZGF0YXNldA0KI1RoZSB2YXJpYWJsZXMgYXJlIGEgYml0IGNvbmZ1c2luZy4gSSB3aWxsIG9ubHkga2VlcCBvbmx5IHZhcmlhYmxlcyB0aGF0IGFyZSBhY3R1YWwgZm9vZCBhbmQgZHJvcCBhbnl0aGluZyB3aGljaCBpcyBub3QgZm9vZC4gIA0KYGBge3J9DQpjYW5keV8yMDE1IDwtIGNhbmR5XzIwMTUgJT4lIA0KICBzZWxlY3QoLWMoDQogICAgY2FzaF9vcl9vdGhlcl9mb3Jtc19vZl9sZWdhbF90ZW5kZXIsIA0KICAgIGNyZWVweV9yZWxpZ2lvdXNfY29taWNzX2NoaWNrX3RyYWN0cywgDQogICAgaHVnc19hY3R1YWxfcGh5c2ljYWxfaHVncywgDQogICAgcGxlYXNlX2xlYXZlX2FueV9yZW1hcmtzX29yX2NvbW1lbnRzX3JlZ2FyZGluZ195b3VyX2Nob2ljZXM6IA0KICAgIHBsZWFzZV9lc3RpbWF0ZV90aGVfZGVncmVlc19vZl9zZXBhcmF0aW9uX3lvdV9oYXZlX2Zyb21fdGhlX2ZvbGxvd2luZ19mb2xrc19iZXlvbmNlX2tub3dsZXMsIA0KICAgIGRlbnRhbF9wYXJhcGhlbmFsaWEsIA0KICAgIGdlbmVyaWNfYnJhbmRfYWNldGFtaW5vcGhlbiwgDQogICAgcGV0ZXJzb25fYnJhbmRfc2lkZXdhbGtfY2hhbGsNCikpDQpjYW5keV8yMDE1DQpgYGANCiN0aW1lIHRvIHJlbmFtZSBzb21lIGNvbHVtbnMuIE1heWJlIG5vdCB0aGF0IG5lY2Vzc2FyeSBidXQgSSB3b3VsZCBsaWtlIHRvIGhhdmUgbXkgZmluYWwgcmVzdWx0cyB3aXRoIG5lYXQgbmFtZXMuIA0KDQpgYGB7cn0NCmNhbmR5XzIwMTUgPC0gY2FuZHlfMjAxNSAlPiUgDQogIHJlbmFtZSgidHJpY2tfb3JfdHJlYXQiID0gYXJlX3lvdV9nb2luZ19hY3R1YWxseV9nb2luZ190cmlja19vcl90cmVhdGluZ195b3Vyc2VsZiwNCiAgICAgICAgICJncmFuZF9iYXIiID0geDEwMF9ncmFuZF9iYXIsIA0KICAgICAgICAgImJyb3duX2dsb2JzIiA9IGFub255bW91c19icm93bl9nbG9ic190aGF0X2NvbWVfaW5fYmxhY2tfYW5kX29yYW5nZV93cmFwcGVycywgDQogICAgICAgICAiYW55X2NhbmR5X2JhciIgPSBhbnlfZnVsbF9zaXplZF9jYW5keV9iYXIsIA0KICAgICAgICAgImJyYWNoX3dpdGhvdXRfY2FuZHlfY29ybiIgPSBicmFjaF9wcm9kdWN0c19ub3RfaW5jbHVkaW5nX2NhbmR5X2Nvcm4sIA0KICAgICAgICAgImhpZ2hfZnJ1Y3Rvc2VfY29ybl9zeXJ1cCIgPSB2aWFsc19vZl9wdXJlX2hpZ2hfZnJ1Y3Rvc2VfY29ybl9zeXJ1cF9mb3JfbWFpbl9saW5pbmdfaW50b195b3VyX3ZlaW4sIA0KICAgICAgICAgInJlc3RhdXJhbnRfY2FuZHkiID0gY2FuZHlfdGhhdF9pc19jbGVhcmx5X2p1c3RfdGhlX3N0dWZmX2dpdmVuX291dF9mb3JfZnJlZV9hdF9yZXN0YXVyYW50cywgDQogICAgICAgICAiY2hpY2tfb19zdGlja3MiICA9IGNoaWNrX29fc3RpY2tzX3dlX2Rvbl90X2tub3dfd2hhdF90aGF0X2lzLCANCiAgICAgICAgICJvbGRfbWFyc2htYWxsb3ciID0gdGhvc2Vfb2RkX21hcnNobWFsbG93X2NpcmN1c19wZWFudXRfdGhpbmdzKQ0KY2FuZHlfMjAxNQ0KYGBgDQojc2FtZSB0aGluZyBmb3IgdGhlIDIwMTYgZGF0YXNldCANCmBgYHtyfQ0KI3NlbGVjdGluZyBvbmx5IHRoZSBuZWNlc3NhcnkgY29sdW1ucyBmb3IgZmluYWwgYW5hbHlzaXMNCmNhbmR5XzIwMTYgPC0gY2FuZHlfMjAxNiAlPiUgDQogIHNlbGVjdCgtYyhjYXNoX29yX290aGVyX2Zvcm1zX29mX2xlZ2FsX3RlbmRlciwgDQogICAgICAgICAgICBjcmVlcHlfcmVsaWdpb3VzX2NvbWljc19jaGlja190cmFjdHMsIA0KICAgICAgICAgICAgZGVudGFsX3BhcmFwaGVuYWxpYSwgDQogICAgICAgICAgICBnZW5lcmljX2JyYW5kX2FjZXRhbWlub3BoZW4sIA0KICAgICAgICAgICAgaHVnc19hY3R1YWxfcGh5c2ljYWxfaHVncywgDQogICAgICAgICAgICBwZXJzb25fb2ZfaW50ZXJlc3Rfc2Vhc29uXzNfZHZkX2JveF9zZXRfbm90X2luY2x1ZGluZ19kaXNjXzRfd2l0aF9oaWxhcmlvdXNfb3V0dGFrZXMsIA0KICAgICAgICAgICAgc291cnBhdGNoX2tpZHNfaV9lX2Fib21pbmF0aW9uc19vZl9uYXR1cmUsIA0KICAgICAgICAgICAgdmlhbHNfb2ZfcHVyZV9oaWdoX2ZydWN0b3NlX2Nvcm5fc3lydXBfZm9yX21haW5fbGluaW5nX2ludG9feW91cl92ZWluLCANCiAgICAgICAgICAgIHBsZWFzZV9saXN0X2FueV9pdGVtc19ub3RfaW5jbHVkZWRfYWJvdmVfdGhhdF9naXZlX3lvdV9qb3k6DQogICAgICAgICAgICAgIHlvcmtfcGVwcGVybWludF9wYXR0aWVzX2lnbm9yZSkpDQpjYW5keV8yMDE2DQpgYGANCg0KYGBge3J9DQpjYW5keV8yMDE2IDwtIGNhbmR5XzIwMTYgJT4lIA0KICByZW5hbWUoInRyaWNrX29yX3RyZWF0IiA9IGFyZV95b3VfZ29pbmdfYWN0dWFsbHlfZ29pbmdfdHJpY2tfb3JfdHJlYXRpbmdfeW91cnNlbGYsIA0KICAgICAgICAgImdlbmRlciIgPSB5b3VyX2dlbmRlciwgDQogICAgICAgICAiYWdlIiA9IGhvd19vbGRfYXJlX3lvdSwgDQogICAgICAgICAiY291bnRyeSIgPSB3aGljaF9jb3VudHJ5X2RvX3lvdV9saXZlX2luLCANCiAgICAgICAgICJzdGF0ZV9wcm92aW5jZSIgPSB3aGljaF9zdGF0ZV9wcm92aW5jZV9jb3VudHlfZG9feW91X2xpdmVfaW4sIA0KICAgICAgICAgImdyYW5kX2JhciIgPSB4MTAwX2dyYW5kX2JhciwgDQogICAgICAgICAiYnJvd25fZ2xvYnMiID0gYW5vbnltb3VzX2Jyb3duX2dsb2JzX3RoYXRfY29tZV9pbl9ibGFja19hbmRfb3JhbmdlX3dyYXBwZXJzLCANCiAgICAgICAgICkNCmNhbmR5XzIwMTYNCmBgYA0KI3NvcnRpbmcgdGhlIGNvdW50cnkgY29sdW1uIA0KDQo=